###This code creates the cleaned panel survey dataset used for the key field experimental results for "Competence versus Priorities: Negative Electoral Responsesto Education Quality in Brazil"
###Authors: Taylor Boas, F. Daniel Hidalgo, Guillermo Toral
###Date: June 23, 2020
#R Version, Package Versions, and Session Information Located at the Bottom of File

#Set Working Directory to directory containing this file and data subdirectory.

# ---- setup ----
##Need the following packages: "tidyverse", "forcats", "haven", "janitor", "knitr"
#set working directory
setwd(".")
library('tidyverse'); library('forcats'); library("haven")
source('data_cleaning_functions.R') #Functions for data celaning

# ---- import_data ----
 ## Data from the survey firm

options("readr.num_columns" = 0)
panel1_full <- read_spss('./data/Banco de Dados - Projeto Boston University (3193 CASOS).sav')
panel2_full <- read_spss('./data/Banco Boston University (2577 casos).sav')

#Import questionaire data
quest <- read_csv('./data/data_questionnaire_panel.csv',
                 locale = locale(encoding="latin1"),
                  col_types = cols(codesetor = col_character())) %>%
  select(qnum, muni, codeibge, codesetor, group)
quest_endline <- read_csv('./data/data_questionnaire_panel_r2.csv',
                          locale = locale(encoding = "latin1")) %>%
  select(qnum, codeibge, muni, codesetor, group, list1, list2, carlos, flier, area1, area2)

##Municipality Data
muni <- read_csv("./data/panel_muni_sample_results.csv",
                locale = locale(encoding = "latin1")) %>%
  select(codeibge, incumbent_wins, status, edu_rank) %>%
  rename(acc_status = status,
         ana_rank = edu_rank)


#  ---- rename_vars ----
panel1_nolabels <- panel1_full %>%
  rename(sex = p1, age = p2, politics_interest = p5, turnout_2012 = p6, vote_2012 = p7, partisan = p11,
         muni_biggest_prob = p13, edu_contas_rank = p16, govt_eval_baseline = p18, acc_eval_baseline = p19,
         uncertain_acc_baseline= p20, edu_eval_baseline = p21, uncertain_edu_baseline= p22,
         tce_knowledge = p23, child_school = p27,
         acc_responsible = p32, confid_fedgov = p28, confid_justice = p29, confid_tce = p30,
         confid_muni = p31, edu_responsible = p33, prob_vote_buying = p34,
         acc_rejected_prior = p39, tce_prior_cert = p40, ana_prior = p41,
         edu_prior_uncert =  p42, treat_quest = p43, years_edu = p44, race = p45, religion = p46,
         relative_wellbeing = p48, income = p47, same_race = p8, turnout_2014 = p9, vote_2014 = p10,
         politician_helped = p14, prob_vote_monitoring = p35, prob_vote_count = p36, ana_knowledge = p25,
         info_welfare = p3701, same_race_mayor = p8, source_cred1 = p3801, mayor_pid = p17,
         pid = p12
         ) %>%
  select(-num_range("p", range = 1:4000), -muni, -grupo, -bairro, -setor)
panel1_nolabels$same_race <- panel1_full$p8

quest <- left_join(quest, muni)
attr(panel1_nolabels$quest, 'format.spss') <- NULL
attr(panel1_nolabels$quest, "display_width") <- NULL
panel1_nolabels <- left_join(quest, panel1_nolabels, by = c('qnum' = 'quest'))

panel2_nolabels <- panel2_full %>%
  rename(quest = quest, turnout = p3, vote_choice = p4, list_exp_vote = p6, vote_vig = p35,
         govt_eval = p12,
         ana_posterior = p23,
         acc_rejected_posterior = p22, decision_factors = p7,
         heard_others_flier = p32,
         top3_1 = p801, top3_2 = p802, top3_3 = p803,
         received_flier = p29, flier_openended1 = p3401, flier_openended2 = p3402,
         flier_openended3 = p3403, good_schools1 = p2101, good_schools2 = p2102,
         good_schools3 = p2103, campaign_edu = p25, ideb_heard = p26,
         ideb_heard_from1 = p2701, ideb_heard_from2 = p2702, ideb_heard_from3 = p2703,
         ideb_goal_met = p28, pol_effort = p19, pol_honesty = p20, acc_backlash = p25,
         campaign_issue = p10, rejected_should_rerun = p36)
#  select(-num_range("p", range = 1:4000), -p33a)
attr(panel2_nolabels$quest, 'format.spss') <- NULL
attr(panel2_nolabels$quest, "display_width") <- NULL
quest_endline <- left_join(quest_endline, muni)
panel2_nolabels <- left_join(quest_endline, panel2_nolabels, by = c('qnum' = 'quest'))


###Deal with Randomization of Question Order
panel2_nolabels$uncertain_edu_eval <- NA
panel2_nolabels$uncertain_acc_eval <- NA
panel2_nolabels$uncertain_edu_open <- NA
panel2_nolabels$uncertain_acc_open <- NA
edu_p14 <- panel2_nolabels$p14[panel2_nolabels$area1 == "escolas municipais"]
edu_p17 <- panel2_nolabels$p17[panel2_nolabels$area2 == "escolas municipais"]
acc_p14 <- panel2_nolabels$p14[panel2_nolabels$area1 == "contas do município"]
acc_p17 <- panel2_nolabels$p17[panel2_nolabels$area2 == "contas do município"]
uncertain_edu_openp15 <- panel2_nolabels$p1501[panel2_nolabels$area1 == "escolas municipais"]
uncertain_edu_openp18 <- panel2_nolabels$p1801[panel2_nolabels$area2 == "escolas municipais"]
uncertain_acc_openp15 <- panel2_nolabels$p1501[panel2_nolabels$area1 == "contas do município"]
uncertain_acc_openp18 <- panel2_nolabels$p1801[panel2_nolabels$area2 == "contas do município"]

panel2_nolabels$uncertain_edu_eval[panel2_nolabels$area1 == "escolas municipais"] <-
  edu_p14
panel2_nolabels$uncertain_edu_eval[panel2_nolabels$area2 == "escolas municipais"] <- edu_p17
panel2_nolabels$uncertain_acc_eval[panel2_nolabels$area1 == "contas do município"] <- acc_p14
panel2_nolabels$uncertain_acc_eval[panel2_nolabels$area2 == "contas do município"] <- acc_p17
panel2_nolabels$uncertain_edu_open[panel2_nolabels$area1 == "escolas municipais"] <- uncertain_edu_openp15
panel2_nolabels$uncertain_edu_open[panel2_nolabels$area2 == "escolas municipais"] <- uncertain_edu_openp18
panel2_nolabels$uncertain_acc_open[panel2_nolabels$area1 == "contas do município"] <- uncertain_acc_openp15
panel2_nolabels$uncertain_acc_open[panel2_nolabels$area2 == "contas_do município"] <- uncertain_acc_openp18


panel2_nolabels$edu_eval <- NA
panel2_nolabels$acc_eval <- NA
edu_p13 <- panel2_nolabels$p13[panel2_nolabels$area1 == "escolas municipais"]
edu_p16 <- panel2_nolabels$p16[panel2_nolabels$area2 == "escolas municipais"]
acc_p13 <- panel2_nolabels$p13[panel2_nolabels$area1 == "contas do município"]
acc_p16 <- panel2_nolabels$p16[panel2_nolabels$area2 == "contas do município"]
panel2_nolabels$edu_eval[panel2_nolabels$area1 == "escolas municipais"] <-
  edu_p13
panel2_nolabels$edu_eval[panel2_nolabels$area2 == "escolas municipais"] <- edu_p16
panel2_nolabels$acc_eval[panel2_nolabels$area1 == "contas do município"] <- acc_p13
panel2_nolabels$acc_eval[panel2_nolabels$area2 == "contas do município"] <- acc_p16

uncertainty_labels <-  attr(panel2_nolabels$p14, "labels")
uncertainty_labels <- tibble(uncertainty_lab = names(uncertainty_labels),
                                uncertainty_codes = uncertainty_labels)
uncertainty_open_labels <- attr(panel2_nolabels$p1501, "labels")
uncertainty_open_labels <- data_frame(uncertainty_open_lab = names(uncertainty_open_labels),
                                     uncertainty_open_codes = uncertainty_open_labels)
eval_labels <-  attr(panel2_nolabels$p13, "labels")
eval_labels <- tibble(eval_lab = names(eval_labels),
                         eval_codes = eval_labels)
panel2_nolabels <- select(panel2_nolabels, -num_range("p", range = 1:4000), -p33a)

# ---- label_baseline ----
##Baseline Panel

panel1 <- panel1_nolabels
panel1$sex <- label_impute_nsnr(panel1_nolabels$sex)
panel1$age <- impute(panel1_nolabels$age, panel1_nolabels$codesetor)
summary(panel1$age)
panel1$politics_interest <- label_impute_nsnr(panel1_nolabels$politics_interest)
panel1$turnout_2012  <- label_impute_nsnr(panel1_nolabels$turnout_2012 )
panel1$turnout_2012 <- label_impute_nsnr(panel1_nolabels$turnout_2012)
panel1$vote_2012 <- label_impute_nsnr(panel1_nolabels$vote_2012)
panel1$partisan <- label_impute_nsnr(panel1_nolabels$partisan)
panel1$muni_biggest_prob_noimpute <- label_nsnr(panel1_nolabels$muni_biggest_prob)
panel1$muni_biggest_prob <- label_impute_nsnr(panel1_nolabels$muni_biggest_prob, crosstabs = FALSE)
panel1$edu_contas_rank <- label_impute_nsnr(panel1_nolabels$edu_contas_rank)
panel1$govt_eval_baseline <- label_impute_nsnr(panel1_nolabels$govt_eval_baseline)
panel1$govt_eval_baseline_noimpute <- label_nsnr(panel1_nolabels$govt_eval_baseline)
panel1$acc_eval_baseline <- label_impute_nsnr(panel1_nolabels$acc_eval_baseline)
panel1$uncertain_acc_baseline <- label_impute_nsnr(panel1_nolabels$uncertain_acc_baseline, recode_nsnr = FALSE)
panel1$edu_eval_baseline <- label_impute_nsnr(panel1_nolabels$edu_eval_baseline)
panel1$uncertain_edu_baseline <- label_impute_nsnr(panel1_nolabels$uncertain_edu_baseline, recode_nsnr = FALSE)
panel1$tce_knowledge <- label_impute_nsnr(panel1_nolabels$tce_knowledge)
panel1$child_school <- label_impute_nsnr(panel1_nolabels$child_school)
panel1$acc_responsible <- label_impute_nsnr(panel1_nolabels$acc_responsible)
panel1$confid_fedgov <- label_impute_nsnr(panel1_nolabels$confid_fedgov)
panel1$confid_justice <- label_impute_nsnr(panel1_nolabels$confid_justice)
panel1$confid_tce <- label_impute_nsnr(panel1_nolabels$confid_tce)
panel1$confid_muni <- label_impute_nsnr(panel1_nolabels$confid_muni)
panel1$confid_fedgov_noimpute <- label_nsnr(panel1_nolabels$confid_fedgov)
panel1$confid_justice_noimpute <- label_nsnr(panel1_nolabels$confid_justice)
panel1$confid_tce_noimpute <- label_nsnr(panel1_nolabels$confid_tce)
panel1$confid_muni_noimpute <- label_nsnr(panel1_nolabels$confid_muni)
panel1$edu_responsible <- label_impute_nsnr(panel1_nolabels$edu_responsible)
panel1$prob_vote_buying <- label_impute_nsnr(panel1_nolabels$prob_vote_buying)
panel1$acc_rejected_prior <- label_impute_nsnr(panel1_nolabels$acc_rejected_prior)
panel1$tce_prior_cert <- label_impute_nsnr(panel1_nolabels$tce_prior_cert)
panel1$ana_prior <- label_impute_nsnr(panel1_nolabels$ana_prior, crosstabs = FALSE) %>%
  factor2num()
panel1$edu_prior_uncert <- label_impute_nsnr(panel1_nolabels$edu_prior_uncert)
panel1$treat_quest <- label_impute_nsnr(panel1_nolabels$treat_quest)
panel1$years_edu <- impute(recode99(as.numeric(panel1_nolabels$years_edu)), panel1_nolabels$codeibge)
summary(panel1$years_edu)
panel1$race <- label_impute_nsnr(panel1_nolabels$race)
panel1$religion <- label_impute_nsnr(panel1_nolabels$religion)
panel1$relative_wellbeing <- label_impute_nsnr(panel1_nolabels$relative_wellbeing)
panel1$income <- label_impute_nsnr(panel1_nolabels$income)
panel1$same_race <- label_impute_nsnr(panel1_nolabels$same_race)
panel1$turnout_2014 <- label_impute_nsnr(panel1_nolabels$turnout_2014)
panel1$vote_2014 <- label_impute_nsnr(panel1_nolabels$vote_2014)
panel1$politician_helped <- label_impute_nsnr(panel1_nolabels$politician_helped)
panel1$prob_vote_monitoring <- label_impute_nsnr(panel1_nolabels$prob_vote_monitoring)
panel1$prob_vote_count <- label_impute_nsnr(panel1_nolabels$prob_vote_count)
panel1$ana_knowledge <- label_impute_nsnr(panel1_nolabels$ana_knowledge)
panel1$info_welfare <- label_impute_nsnr(panel1_nolabels$info_welfare)
panel1$same_race_mayor <- label_impute_nsnr(panel1$same_race_mayor)
panel1$source_cred1 <- label_impute_nsnr(panel1$source_cred1)
panel1$mayor_pid <- label_impute_nsnr(panel1$mayor_pid)
panel1$pid <- label_nsnr(panel1$pid)

# ---- label_endline ----

##Endline Panel
panel2 <- panel2_nolabels
panel2$turnout <- label_nsnr(panel2_nolabels$turnout)
panel2$vote_choice <- label_nsnr(panel2_nolabels$vote_choice)
#The following change is made due to a digitization error in the original data
panel2$vote_choice[panel2$qnum == 702] <- "Branco/Nulo"
panel2$list_exp_vote[panel2$list_exp_vote == 9] <- NA
tabyl(panel2$list_exp_vote)
panel2$decision_factors <- label_nsnr(panel2_nolabels$decision_factors)
panel2$top3_1 <- label_nsnr(panel2_nolabels$top3_1)
panel2$top3_2 <- label_nsnr(panel2_nolabels$top3_2)
panel2$top3_3 <- label_nsnr(panel2_nolabels$top3_3)
panel2$govt_eval <- label_nsnr(panel2_nolabels$govt_eval)

tabyl(panel2$uncertain_edu_eval)
panel2$uncertain_edu_eval <- NULL
uncertain_edu_eval <- left_join(select(panel2_nolabels, qnum, uncertain_edu_eval),
                               uncertainty_labels, by = c("uncertain_edu_eval" = "uncertainty_codes")) %>%
  select(-uncertain_edu_eval) %>%
  rename(uncertain_edu_eval = uncertainty_lab)
panel2 <- left_join(panel2, uncertain_edu_eval)
tabyl(panel2$uncertain_edu_eval)

tabyl(panel2$uncertain_acc_eval)
panel2$uncertain_acc_eval <- NULL
uncertain_acc_eval <- left_join(select(panel2_nolabels, qnum, uncertain_acc_eval),
                               uncertainty_labels, by = c("uncertain_acc_eval" = "uncertainty_codes")) %>%
  select(-uncertain_acc_eval) %>%
  rename(uncertain_acc_eval = uncertainty_lab)
panel2 <- left_join(panel2, uncertain_acc_eval)
tabyl(panel2$uncertain_acc_eval)



panel2$uncertain_edu_open <- NULL
uncertain_edu_open <- left_join(select(panel2_nolabels, qnum, uncertain_edu_open),
                                uncertainty_open_labels, by = c( "uncertain_edu_open" =
                                                           "uncertainty_open_codes" )) %>%
  select(-uncertain_edu_open) %>%
  rename(uncertain_edu_open = uncertainty_open_lab)
panel2 <- left_join(panel2, uncertain_edu_open)
tabyl(panel2$uncertain_edu_open, sort = TRUE)


panel2$uncertain_acc_open <- NULL
uncertain_acc_open <- left_join(select(panel2_nolabels, qnum, uncertain_acc_open),
                                uncertainty_open_labels, by = c( "uncertain_acc_open" =
                                                           "uncertainty_open_codes" )) %>%
  select(-uncertain_acc_open) %>%
  rename(uncertain_acc_open = uncertainty_open_lab)
panel2 <- left_join(panel2, uncertain_acc_open)
tabyl(panel2$uncertain_acc_open, sort = TRUE)

tabyl(panel2$edu_eval)
panel2$edu_eval <- NULL
edu_eval <- left_join(select(panel2_nolabels, qnum, edu_eval),
                               eval_labels, by = c("edu_eval" = "eval_codes")) %>%
  select(-edu_eval) %>%
  rename(edu_eval = eval_lab)
panel2 <- left_join(panel2, edu_eval)
tabyl(panel2$edu_eval)

tabyl(panel2$acc_eval)
panel2$acc_eval <- NULL
acc_eval <- left_join(select(panel2_nolabels, qnum, acc_eval),
                               eval_labels, by = c("acc_eval" = "eval_codes")) %>%
  select(-acc_eval) %>%
  rename(acc_eval = eval_lab)
panel2 <- left_join(panel2, acc_eval)
tabyl(panel2$acc_eval)


panel2$acc_rejected_posterior <- label_nsnr(panel2_nolabels$acc_rejected_posterior)
panel2$ana_posterior <- factor2num(label_nsnr(panel2_nolabels$ana_posterior, crosstabs = FALSE))
summary(panel2$ana_posterior)
panel2$received_flier <- label_nsnr(panel2_nolabels$received_flier)
panel2$vote_vig_cont <- panel2_nolabels$vote_vig
panel2$vote_vig_cont[panel2$vote_vig_cont == 9] <- NA
panel2$vote_vig <- label_nsnr(panel2_nolabels$vote_vig)
panel2$heard_others_flier <- label_nsnr(panel2_nolabels$heard_others_flier)

tabyl(panel2$flier_openended1)
panel2$flier_openended1 <- (label_nsnr(panel2_nolabels$flier_openended1))

tabyl(panel2$flier_openended2, sort = TRUE)
panel2$flier_openended2 <- (label_nsnr(panel2_nolabels$flier_openended2))
tabyl(panel2$flier_openended2, sort = TRUE)

tabyl(panel2$flier_openended3)
panel2$flier_openended3 <- (label_nsnr(panel2_nolabels$flier_openended3))
tabyl(panel2$flier_openended3, sort = TRUE)

tabyl(panel2$good_schools1)
panel2$good_schools1 <- label_nsnr(panel2_nolabels$good_schools1)
tabyl(panel2$good_schools1)


tabyl(panel2$good_schools2)
panel2$good_schools2 <- label_nsnr(panel2_nolabels$good_schools2)
tabyl(panel2$good_schools2)

tabyl(panel2$good_schools3)
panel2$good_schools3 <- label_nsnr(panel2_nolabels$good_schools3)
tabyl(panel2$good_schools3)

tabyl(panel2$ideb_heard)
panel2$ideb_heard <- label_nsnr(panel2_nolabels$ideb_heard)
tabyl(panel2$ideb_heard)


tabyl(panel2$ideb_heard_from1)
panel2$ideb_heard_from1 <- label_nsnr(panel2_nolabels$ideb_heard_from1)
tabyl(panel2$ideb_heard_from1)

tabyl(panel2$ideb_heard_from2)
panel2$ideb_heard_from2 <- label_nsnr(panel2_nolabels$ideb_heard_from2)
tabyl(panel2$ideb_heard_from2)

tabyl(panel2$ideb_heard_from3)
panel2$ideb_heard_from3 <- label_nsnr(panel2_nolabels$ideb_heard_from3)
tabyl(panel2$ideb_heard_from3)

tabyl(panel2$ideb_goal_met)
panel2$ideb_goal_met <- label_nsnr(panel2_nolabels$ideb_goal_met, recode_nsnr = FALSE)
tabyl(panel2$ideb_goal_met)

tabyl(panel2$pol_effort)
panel2$pol_effort <- label_nsnr(panel2_nolabels$pol_effort)
tabyl(panel2$pol_effort)

tabyl(panel2$pol_honesty)
panel2$pol_honesty_noimpute <- label_nsnr(panel2_nolabels$pol_honesty, recode_nsnr = FALSE)
panel2$pol_honesty <- label_nsnr(panel2_nolabels$pol_honesty)
tabyl(panel2$pol_honesty)

tabyl(panel2$acc_backlash)
panel2$acc_backlash <- label_nsnr(panel2_nolabels$acc_backlash)
tabyl(panel2$acc_backlash)

tabyl(panel2$campaign_issue)
panel2$campaign_issue <- label_nsnr(panel2_nolabels$campaign_issue)
tabyl(panel2$campaign_issue)

tabyl(panel2$rejected_should_rerun)
panel2$rejected_should_rerun <- label_nsnr(panel2_nolabels$rejected_should_rerun)
tabyl(panel2$rejected_should_rerun)

# ---- recode_vars_baseline ----


## Baseline Survey

## Independent Variables
panel1$ana <- ifelse(panel1$group == "(3) Educação", 1, 0)
tabyl(panel1$ana)
panel1$accounts <- ifelse(panel1$group == "(2) Gestão Financeira", 1, 0)
tabyl(panel1$accounts)
panel1$acc_rejected <- ifelse(panel1$acc_status == "REJEIÇÃO", 1, 0)
tabyl(panel1$acc_rejected)
panel1$acc_rejected_prior <- ifelse(panel1$acc_rejected_prior == "Rejeitadas", 1, 0)
tabyl(panel1$acc_rejected_prior)
panel1$control <- ifelse(panel1$group == "(1) Controle", 1, 0)
tabyl(panel1$control)
panel1$good_ana <- ifelse((panel1$ana_rank < panel1$ana_prior) |
                          ((panel1$ana_rank == panel1$ana_prior) &
                           (panel1$ana_rank <= 93)), 1, 0)
tabyl(panel1$good_ana)
panel1$good_accounts <- 1 - panel1$acc_rejected_prior
tabyl(panel1$good_accounts)



##Moderators
panel1$child_school <- ifelse(panel1$child_school == "Sim", 1, 0)
tabyl(panel1$child_school)
panel1$uncertain_acc_baseline[panel1$uncertain_acc_baseline== "Não sabe / Não respondeu"] <- "7"
panel1$uncertain_acc_baseline <- factor2num(panel1$uncertain_acc_baseline)
tabyl(panel1$uncertain_acc_baseline)
panel1$uncertain_edu_baseline[panel1$uncertain_edu_baseline == "Não sabe / Não respondeu"] <- "7"
panel1$uncertain_edu_baseline <- factor2num(panel1$uncertain_edu_baseline)
tabyl(panel1$uncertain_edu_baseline)

## Demographic Variables
panel1$female <- ifelse(panel1$sex == "Mulher", 1, 0)
tabyl(panel1$female)

##Electoral Variables
panel1$vote_2012 <- ifelse(panel1$vote_2012 == "Votou no prefeito atual", 1, 0)
tabyl(panel1$vote_2012)
panel1$turnout_2012 <- ifelse(panel1$turnout_2012 == "Sim", 1, 0)
tabyl(panel1$turnout_2012)
panel1$partisan <- ifelse(panel1$partisan == "Sim", 1, 0)
tabyl(panel1$partisan)
panel1$politician_helped <- ifelse(panel1$politician_helped == "Sim", 1, 0)
tabyl(panel1$politician_helped)

##Knowledge and Views on Government and Policies
panel1$edu_rank1 <- ifelse(panel1$edu_contas_rank == "A administração das escolas municipais", 1, 0)
tabyl(panel1$edu_rank1)
panel1$acc_rank1 <- ifelse(panel1$edu_contas_rank == "A administração das contas do município", 1, 0)
tabyl(panel1$acc_rank1)
panel1$tce_knowledge <- ifelse(panel1$tce_knowledge == "Sim", 1, 0)
tabyl(panel1$tce_knowledge)
panel1$ana_knowledge <- ifelse(panel1$ana_knowledge == "Sim", 1, 0)
tabyl(panel1$ana_knowledge)
panel1$acc_responsible <- factor2num(panel1$acc_responsible)
tabyl(panel1$acc_responsible)
panel1$edu_responsible <- factor2num(panel1$edu_responsible)
tabyl(panel1$edu_responsible)

##Confidence in Institutions
panel1$confid_fedgov <- factor2num(panel1$confid_fedgov)
tabyl(panel1$confid_fedgov)
panel1$confid_justice <- factor2num(panel1$confid_justice)
tabyl(panel1$confid_justice)
panel1$confid_tce <- factor2num(panel1$confid_tce)
tabyl(panel1$confid_tce)
panel1$confid_muni <- factor2num(panel1$confid_muni)
tabyl(panel1$confid_muni)

##Salience of Information about Corruption
tabyl(panel1$info_welfare)
panel1$info_corrupt_welfare1 <- ifelse(panel1$info_welfare == "Se ele se envolveu em corrupção",
                                      1, 0)
tabyl(panel1$info_corrupt_welfare1)

##Same Race as mayor
tabyl(panel1$same_race_mayor)
panel1$same_race_mayor <- ifelse(panel1$same_race_mayor == "Sim", 1, 0)
tabyl(panel1$same_race_mayor)

##Source Credibility: Survey
tabyl(panel1$source_cred1)
panel1$source_cred1_survey <- ifelse(panel1$source_cred1 == "Um panfleto de uma ONG", 1, 0)
tabyl(panel1$source_cred1_survey)

##Probability of Vote Monitoring
tabyl(panel1$prob_vote_monitoring)
panel1$prob_vote_monitoring <- factor(panel1$prob_vote_monitoring,
                                     levels = c("Não acontecerá de jeito nenhum",
                                                "Pouco provável que aconteça",
                                                "Mais ou menos provável",
                                                "Muito provável")) %>%
  as.numeric()
tabyl(panel1$prob_vote_monitoring)

##Probability of Vote Count Not Being Free and Fair
tabyl(panel1$prob_vote_count)
panel1$prob_vote_count <- factor(panel1$prob_vote_count,
                                levels = c("Não será correta de jeito nenhum",
                                           "Pouco provável que seja correta",
                                           "Mais ou menos provável",
                                           "Muito provável")) %>%
  fct_rev() %>%
  as.numeric()
tabyl(panel1$prob_vote_count)

#Combined variable as specified in the metapap
panel1$free_fair <- panel1$prob_vote_monitoring + panel1$prob_vote_count

#Sympathy with mayor's party
tabyl(panel1$mayor_pid)
panel1$mayor_pid <- as.numeric(panel1$mayor_pid)
tabyl(panel1$mayor_pid)

tabyl(panel1$prob_vote_buying)
panel1$prob_vote_buying <- factor(panel1$prob_vote_buying,
                                     levels = c("Não acontecerá de jeito nenhum",
                                                "Pouco provável que aconteça",
                                                "Mais ou menos provável",
                                                "Muito provável")
                                 ) %>%
  as.numeric()
tabyl(panel1$prob_vote_buying)

## Income
tabyl(panel1$income)
panel1$income[panel1$income == "Não sabe / Não respondeu"] <- NA
panel1$income <- as.numeric(panel1$income)

# ---- recode_vars_endline ----

## List Experiments and Vignette Experiment
panel2$vote_buying_list <- ifelse(is.na(panel2$list1) == FALSE, 1, 0)
tabyl(panel2$vote_buying_list)
panel2$vote_list <- ifelse(is.na(panel2$list2) == FALSE, 1, 0)
tabyl(panel2$vote_list)
panel2$vig_treatment <- ifelse(is.na(panel2$carlos) == FALSE, 1, 0)
tabyl(panel2$vig_treatment)

## Voting Behavior Outcomes
panel2$turnout <- ifelse(panel2$turnout == "Sim", 1, 0)
tabyl(panel2$turnout)
panel2$valid_vote <- ifelse(panel2$turnout == 1 & panel2$vote_choice != "Branco/Nulo", 1, 0)
tabyl(panel2$valid_vote)
#Note: For vote choice recodes, we use candidate position on the ballot to identify the candidate
load('./data/cand_merge_data.RData')
merge_data$order <- as.factor(merge_data$order)
panel2 <- left_join(panel2, select(merge_data, codeibge, party, incumbent, winner, order), by = c('codeibge' = 'codeibge', 'vote_choice' = 'order') ) %>%
  rename(vote_party = party, vote = incumbent, vote_winner = winner)
panel2$vote[panel2$vote_choice == "Branco/Nulo"] <- 0
panel2$vote_winner[panel2$vote_choice == "Branco/Nulo"] <- 0
panel2$vote[panel2$turnout == 0] <- 0
panel2$vote_winner[panel2$turnout == 0] <- 0
tabyl(panel2$vote_party)
tabyl(panel2$vote)
tabyl(panel2$vote_winner)
panel2$vote_vig_strict <- ifelse(panel2$vote_vig == "Grande chance", 1, 0 )
tabyl(panel2$vote_vig_strict)
panel2$vote_vig <- ifelse(panel2$vote_vig == "Grande chance" | panel2$vote_vig == 'Alguma chance', 1, 0)
tabyl(panel2$vote_vig)

## Beliefs and Knowledge about Incumbent Performance
panel2$acc_rejected_posterior <- ifelse(panel2$acc_rejected_posterior == "Rejeitadas", 1, 0)
tabyl(panel2$acc_rejected_posterior)

tabyl(panel2$govt_eval)
panel2$govt_eval <-
  factor(panel2$govt_eval, levels = c("Ótima", "Boa", "Regular", "Ruim", "Péssima")) %>%
  fct_rev() %>%
  as.numeric()
tabyl(panel2$govt_eval)

tabyl(panel2$acc_eval)
panel2$acc_eval <-
  factor(panel2$acc_eval, levels = c("Ótimo", "Bom", "Regular", "Ruim", "Péssimo")) %>%
  fct_rev() %>%
  as.numeric()
tabyl(panel2$acc_eval)
tabyl(panel2$edu_eval)
panel2$edu_eval <- factor(panel2$edu_eval, levels = c("Ótimo", "Bom", "Regular", "Ruim", "Péssimo")) %>%
  fct_rev() %>%
  as.numeric()
tabyl(panel2$edu_eval)

tabyl(panel2$uncertain_acc_eval)
panel2$uncertain_acc_eval[panel2$uncertain_acc_eval == "NS/NR"] <- "Muito inseguro(a)"
panel2$uncertain_acc_eval <- factor(panel2$uncertain_acc_eval, levels = c("Muito inseguro(a)", "Inseguro(a)",
                                                                         "Seguro(a)", "Muito seguro(a)")) %>%
  fct_rev() %>%
  as.numeric()
tabyl(panel2$uncertain_acc_eval)

tabyl(panel2$uncertain_edu_eval)
panel2$uncertain_edu_eval[panel2$uncertain_edu_eval == "NS/NR"] <- "Muito inseguro(a)"
panel2$uncertain_edu_eval <- factor(panel2$uncertain_edu_eval, levels = c("Muito inseguro(a)", "Inseguro(a)",
                                                                         "Seguro(a)", "Muito seguro(a)")) %>%
  as.numeric()
tabyl(panel2$uncertain_edu_eval)

## Importance of Information for Vote Decisions
panel2$info_most_important <- ifelse(panel2$top3_1 == "O que aprendi durante a campanha sobre o atual prefeito", 1, 0)
tabyl(panel2$info_most_important)
panel2$info_import <- ifelse(panel2$top3_1 == "O que aprendi durante a campanha sobre o atual prefeito"|
                             panel2$top3_2 == "O que aprendi durante a campanha sobre o atual prefeito"|
                             panel2$top3_3 == "O que aprendi durante a campanha sobre o atual prefeito", 1, 0)
tabyl(panel2$info_import)

## Flier Recall
panel2$received_flier <- ifelse(panel2$received_flier == "Sim", 1, 0)
tabyl(panel2$received_flier)

##Candidate Effort
tabyl(panel2$pol_effort)
panel2$pol_effort <- factor(panel2$pol_effort, levels = c("Muito menos", "Menos", "Igual",
                                                         "Mais", "Muito mais")) %>%
  as.numeric()
tabyl(panel2$pol_effort)

##Candidate Honesty
tabyl(panel2$pol_honesty)
panel2$pol_honesty <- factor(panel2$pol_honesty, levels = c("Não ficaria surpreso(a) de jeito nenhum",
                                                           "Pouco surpreso(a)", "Mais ou menos surpreso(a)",
                                                           "Muito surpreso(a)")) %>%
  as.numeric()
tabyl(panel2$pol_honesty)

##Accounts Surprise
panel1$acc_goodsurprise <- ifelse(panel1$acc_rejected_prior == 1 & panel1$acc_rejected == 0, 1, 0)
panel1$acc_badsurprise <- ifelse(panel1$acc_rejected_prior == 0 & panel1$acc_rejected == 1, 1, 0)
panel1$acc_nosurprise <- ifelse(panel1$acc_goodsurprise == 0 &
                              panel1$acc_badsurprise == 0, 1, 0)

# ---- attrition ----
panel2$attrited <- ifelse(is.na(panel2$data), "Attrited", "Recontacted")
panel2 %>%
  crosstab(muni, attrited) %>%
  adorn_crosstab(denom = "row", show_totals = TRUE) %>%
  knitr::kable()

attrition_bysetor <- group_by(panel2) %>%
  crosstab(codesetor, attrited, percent = "row")

## Recode to a 0-1 variable
panel2$attrited <- ifelse(panel2$attrited == "Attrited", 1, 0)
# ---- attrited_tracts ----

attrition_bysetor <- left_join(attrition_bysetor, select(panel2, codesetor, muni))

attrition_bysetor %>%
  unique() %>%
  filter(Attrited > .5) %>%
  knitr::kable()


# ---- recodes_heterogeneity_outcomes ----
panel1$govt_eval_baseline <- factor(panel1$govt_eval_baseline,
                                   levels = c("Ótima", "Boa", "Regular", "Ruim", "Péssima")) %>%
  as.numeric()
panel1$govt_eval_baseline_noimpute <- factor(panel1$govt_eval_baseline_noimpute,
                                    levels = c("Ótima", "Boa", "Regular", "Ruim", "Péssima")) %>%
  as.numeric()

panel1$relative_wellbeing <- factor(panel1$relative_wellbeing,
                                    levels = c("Muito pior", "Pior", "Igual", "Melhor", "Muito melhor")) %>%
  as.numeric()


# ---- save_data ----
##
panel2 <- select(panel2,  -data, -hora_1, -hora_2, -pesq)


baseline <- panel1
baseline$codesetor <- as.numeric(baseline$codesetor)
endline <- panel2

panel <- left_join(baseline, endline)

## Eliminate Setores that were unable to be completed
## Eliminate Tacaimbo because incumbent failed to rerun
panel <- filter(panel, (codesetor %in% c("260560810000001", "260560810000002",
                                         "261255405000008", "261255405000007") == FALSE) &
                  muni != "Tacaimbó")

panel$acc_correct <- 1 - abs(panel$acc_rejected_posterior - panel$acc_rejected)
panel$ana_correct <- 184 - abs(panel$ana_posterior - panel$ana_rank)
panel$acc_anysurprise <- ifelse(panel$acc_rejected_prior != panel$acc_rejected, 1, 0)
panel$acc_goodsurprise <- ifelse(panel$acc_rejected_prior == 1 &
                                   panel$acc_rejected == 0, 1, 0)
panel$acc_badsurprise <- ifelse(panel$acc_rejected_prior == 0 &
                                  panel$acc_rejected == 1, 1, 0)
panel$acc_nosurprise <- ifelse(panel$acc_goodsurprise == 0 &
                                 panel$acc_badsurprise == 0, 1, 0)

##Merge in Political Competition Data for use as a moderator
elec_compet <- read_csv("./data/competitiveness_2012.csv")
elec_compet$SIGLA_UE <- as.character(elec_compet$SIGLA_UE)

load("./data/data_rosettastone.RData")
elec_compet <- left_join(elec_compet, br, by = c("SIGLA_UE" = "codetse"))

##Drop Variables Not Used in the Analysis

dropped_var <- c("data",
                 "hora_1",
                 "sex",
                 "edu_contas_rank",
                 "info_welfare",
                 "source_cred1",
                 "treat_quest",
                 "pesq",
                 "hora_2",
                 "good_accounts",
                 "acc_goodsurprise",
                 "acc_badsurprise",
                 "acc_nosurprise",
                 "list1",
                 "list2",
                 "carlos",
                 "area1",
                 "area2",
                 "list_exp_vote",
                 "decision_factors",
                 "top3_1",
                 "top3_2",
                 "top3_3",
                 "govt_eval",
                 "pol_honesty",
                 "good_schools1",
                 "good_schools2",
                 "good_schools3",
                 "acc_rejected_posterior",
                 "ana_posterior",
                 "acc_backlash",
                 "ideb_heard",
                 "ideb_heard_from1",
                 "ideb_heard_from2",
                 "ideb_heard_from3",
                 "ideb_goal_met",
                 "received_flier",
                 "heard_others_flier",
                 "flier_openended1",
                 "flier_openended2",
                 "flier_openended3",
                 "uncertain_acc_eval",
                 "uncertain_edu_open",
                 "uncertain_acc_open",
                 "acc_eval",
                 "vote_buying_list",
                 "vote_list",
                 "vote_party",
                 "vote_winner",
                 "info_most_important",
                 "acc_correct",
                 "acc_anysurprise")

panel <- panel %>%
  select(-one_of(dropped_var))


# Recode factor covars to numeric -----------------------------------------

recode_nsnr <- function(x){
  x[x == "Não sabe / Não respondeu"| x == "NS/NR"] <- NA
  x
}

get_mode <- function(x){
  names(sort(table(x), decreasing = TRUE)[1])
}

impute <- function(x, block){
  if(is.character(x) == TRUE | is.factor(x) == TRUE){
    imputed <-  ave(x, block, FUN = get_mode)
  }
  if(is.numeric(x) == TRUE){
    imputed <- ave(x, block, FUN = mean, na.rm = TRUE)
    imputed[is.na(imputed)] <- mean(x, na.rm = TRUE)
  }
  x[is.na(x) | is.nan(x)] <- imputed[is.na(x) | is.nan(x)]
  x
}

panel$politics_interest <- as_factor(panel$politics_interest) %>%
  fct_relevel("Não sabe / Não respondeu", "Nada interessado(a)", "Pouco interessado(a)",
              "Interessado(a)", "Muito interessado(a)") %>%
  recode_nsnr() %>%
  fct_drop() %>%
  impute(panel$codesetor) %>%
  as.numeric()

panel$turnout_2014 <- as_factor(panel$turnout_2014) %>%
  recode_nsnr() %>%
  impute(panel$codesetor) %>%
  fct_drop() %>%
  fct_relevel("Não", "Sim") %>%
  as.numeric()

panel$vote_2014 <- as_factor(panel$vote_2014) %>%
  recode_nsnr() %>%
  impute(panel$codesetor) %>%
  fct_drop() %>%
  fct_lump(prop = .05)

panel$acc_eval_baseline <- as_factor(panel$acc_eval_baseline) %>%
  recode_nsnr() %>%
  impute(panel$codesetor) %>%
  fct_drop() %>%
  fct_relevel("Péssimo", "Ruim", "Regular", "Bom", "Ótimo") %>%
  as.numeric()

panel$edu_eval_baseline <- as_factor(panel$edu_eval_baseline) %>%
  recode_nsnr() %>%
  impute(panel$codesetor) %>%
  fct_drop() %>%
  fct_relevel("Péssimo", "Ruim", "Regular", "Bom", "Ótimo") %>%
  as.numeric()

panel$tce_prior_cert <- as_factor(panel$tce_prior_cert) %>%
  recode_nsnr() %>%
  impute(panel$codesetor) %>%
  fct_drop() %>%
  fct_relevel("Muito inseguro(a)", "Inseguro(a)", "Seguro(a)", "Muito seguro(a)") %>%
  as.numeric()

panel$edu_prior_uncert <- as_factor(panel$edu_prior_uncert) %>%
  recode_nsnr() %>%
  impute(panel$codesetor) %>%
  fct_drop() %>%
  fct_relevel("Muito inseguro(a)", "Inseguro(a)", "Seguro(a)", "Muito seguro(a)") %>%
  as.numeric()

panel$religion <- recode_nsnr(panel$religion) %>%
  impute(panel$codesetor) %>%
  fct_lump(prop = .05)

panel$race <- recode_nsnr(panel$race) %>%
  impute(panel$codesetor) %>%
  fct_lump(prop = .05)

panel$muni_biggest_prob <- recode_nsnr(panel$muni_biggest_prob) %>%
  impute(panel$codesetor) %>%
  fct_lump(prop = .05)

#Fix error in imputation
panel$child_school[panel$qnum %in%c(3L, 320L, 731L, 734L, 755L, 1355L, 1357L, 1787L, 2092L, 2094L, 2982L)] <- 0


# write data to csv -------------------------------------------------------

write_csv(panel, path = "./data/panel_cleaned.csv")

# > sessionInfo()
# R version 3.6.3 (2020-02-29)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Ubuntu 18.04.4 LTS
#
# Matrix products: default
# BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
# LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
#
# locale:
#   [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
# [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_US.UTF-8       LC_NAME=C
# [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#
# attached base packages:
#   [1] stats     graphics  grDevices utils     datasets  methods   base
#
# other attached packages:
#   [1] janitor_2.0.1   haven_2.3.1     forcats_0.5.0   stringr_1.4.0   dplyr_1.0.0     purrr_0.3.4     readr_1.3.1
# [8] tidyr_1.1.0     tibble_3.0.1    ggplot2_3.3.2   tidyverse_1.3.0
#
# loaded via a namespace (and not attached):
#   [1] Rcpp_1.0.4.6     cellranger_1.1.0 pillar_1.4.4     compiler_3.6.3   dbplyr_1.4.4     tools_3.6.3
# [7] packrat_0.5.0    lubridate_1.7.9  jsonlite_1.6.1   lifecycle_0.2.0  nlme_3.1-147     gtable_0.3.0
# [13] lattice_0.20-41  pkgconfig_2.0.3  rlang_0.4.6      reprex_0.3.0     cli_2.0.2        DBI_1.1.0
# [19] rstudioapi_0.11  parallel_3.6.3   withr_2.2.0      xml2_1.3.2       httr_1.4.1       fs_1.4.1
# [25] generics_0.0.2   vctrs_0.3.1      hms_0.5.3        grid_3.6.3       tidyselect_1.1.0 snakecase_0.11.0
# [31] glue_1.4.1       R6_2.4.1         fansi_0.4.1      readxl_1.3.1     modelr_0.1.8     blob_1.2.1
# [37] magrittr_1.5     backports_1.1.8  scales_1.1.1     ellipsis_0.3.1   rvest_0.3.5      assertthat_0.2.1
# [43] colorspace_1.4-1 stringi_1.4.6    munsell_0.5.0    broom_0.5.6      crayon_1.3.4
